#include <yam/loader.h>

#include <stdexcept>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <cstring>
#include <regex>

#include <yam/type.h>

namespace yam {

using namespace std;

constexpr char NUL = 0;
constexpr char LF = 0x0A;
constexpr char CR = 0x0D;
constexpr char TAB = 0x09;
constexpr char SPACE = 0x20;
constexpr char COMMA = 0x2C;  // ','
constexpr char LEFT_BRACKET = 0x5B;  // '['
constexpr char RIGHT_BRACKET = 0x5D;  // ']'
constexpr char LEFT_BRACE = 0x7B;  // '{'
constexpr char RIGHT_BRACE = 0x7D;  // '}'
constexpr char POUND = 0x23;  // '#'
constexpr char PERCENT = 0x25;  // '%'
constexpr char HYPHEN = 0x2D;  // '-'
constexpr char EXCLAMATION = 0x21;  // '!'
constexpr char LESS_THAN = 0x3C;  // '<'
constexpr char GREATER_THAN = 0x3E;  // '>'
constexpr char AMPERSAND = 0x26;  // '&'
constexpr char ASTERISK = 0x2A;  // '*'
constexpr char PIPE = 0x7C;  // '|'
constexpr char SINGLE_QUOTE = 0x27;  // '\''
constexpr char DOUBLE_QUOTE = 0x22;  // '"'
constexpr char AT = 0x40;  // '@'
constexpr char BACK_QUOTE = 0x60;  // '`'
constexpr char QUESTION_MARK = 0x3F;  // '?'
constexpr char COLON = 0x3A;  // ':'
constexpr char PERIOD = 0x2E;  // '.'
constexpr char BACKSLASH = 0X5c;  // '\\'
constexpr char PLUS = 0X2B;  // '+'
constexpr char MINUS = 0X2D;  // '-'

enum Context {
    FLOW_IN, FLOW_OUT, BLOCK_IN, BLOCK_OUT
};

enum Chomping {
    CLIP, STRIP, KEEP
};

string repeatString(const string& str, size_t count) {
    string result;
    for (size_t i = 0; i < count; ++i)
        result += str;
    return result;
}

char fromHexCode(const char& c) {
    if ('0' <= c and c <= '9')
        return c - '0';

    char lc = c | 0x20;
    if ('a' <= lc and lc <= 'f')
        return lc - 'a' + 10;

    return -1;
}

char escapedHexLen(const char& c) {
    switch (c) {
        case 'x': return 2;
        case 'u': return 4;
        case 'U': return 8;
        default: return 0;
    }
}

int fromDecimalCode(const char& c) {
    if ('0' <= c and c <= '9')
        return c - 0x30;
    else
        return -1;
}

const char* simpleEscapeSequence(const char& c) {
    switch (c) {
        case 0x30 /* 0 */: return "\x00";
        case 0x61 /* a */: return "\x07";
        case 0x62 /* b */: return "\x08";
        case 0x74 /* t */: return "\x09";
        case 0x09 /* Tab */: return "\x09";
        case 0x6E /* n */: return "\x0A";
        case 0x76 /* v */: return "\x0B";
        case 0x66 /* f */: return "\x0C";
        case 0x72 /* r */: return "\x0D";
        case 0x65 /* e */: return "\x1B";
        case 0x20 /* Space */: return " ";
        case 0x22 /* " */: return "\x22";
        case 0x2F /* / */: return "/";
        case 0x5C /* \ */: return "\x5C";
        case 0x4E /* N */: return "\x85";
        case 0x5F /* _ */: return "\xA0";
        case 0x4C /* L */: return "\u2028";
        case 0x50 /* P */: return "\u2029";
        default: return "";
    }
}

bool* makeSimpleEscapeCheck() {
    static bool checks[256];
    for (auto i = 0; i < 256; ++i)
        checks[i] = std::strlen(simpleEscapeSequence(i)) != 0;
    return checks;
}

const char** makeSimpleEscapeMap() {
    static const char* map[256];
    for (auto i = 0; i < 256; ++i)
        map[i] = simpleEscapeSequence(i);
    return map;
}

bool* simpleEscapeCheck = makeSimpleEscapeCheck();
const char** simpleEscapeMap = makeSimpleEscapeMap();

/* See: https://en.wikipedia.org/wiki/UTF-8#Description */
/* and https://en.wikipedia.org/wiki/UTF-8#Invalid_code_points */
string utf8FromCodePoint(uint32_t cp, bool checkInvalid = true) {
    string utf8;
    if (cp <= 0x7F)
        utf8 += char(cp);
    else if (cp <= 0x7FF) {
        utf8 += char(192 + (cp >> 6));
        utf8 += char(128 + (cp & 64));
    }
    else if (cp <= 0xFFFF) {
        if (checkInvalid and 0xD800 <= cp and cp <= 0xDFFF)
            throw invalid_argument("invalid code point " + std::to_string(cp));
        utf8 += char(224 + (cp >> 12));
        utf8 += char(128 + ((cp & 4096) >> 6));
        utf8 += char(128 + (cp & 64));
    }
    else if (cp <= 0x10FFFF) {
        utf8 += char(240 + (cp >> 18));
        utf8 += char(128 + ((cp & 262144/* 2^18 */) >> 12));
        utf8 += char(128 + ((cp & 4096) >> 6));
        utf8 += char(128 + (cp & 64));
    }
    else
        throw invalid_argument("invalid code point " + std::to_string(cp));
    return utf8;
}

static const std::regex TAG_HANDLE_PATTERN(R"re(^(?:!|!!|![a-z\-]+!)$)re", std::regex::icase);
static const std::regex TAG_URI_PATTERN(R"re(^(?:!|[^,\[\]\{\}])(?:%[0-9a-f]{2}|[0-9a-z\-#;\/\?:@&=\+\$,_\.!~\*'\(\)\[\]])*$)re", std::regex::icase);
static const std::regex FLOW_INDICATOR_PATTERN(R"re([,\[\]\{\}])re", std::regex::icase);
static const std::regex NOT_PRINTABLE_PATTERN("[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x84\x86-\x9F\uFFFE\uFFFF]|[" +
                                        utf8FromCodePoint(0xD800, false) + "-" + utf8FromCodePoint(0xDBFF, false) + "](?![" +
                                        utf8FromCodePoint(0xDC00, false) + "-" + utf8FromCodePoint(0xDFFF, false) + "])|(?:[^" +
                                        utf8FromCodePoint(0xD800, false) + "-" + utf8FromCodePoint(0xDBFF, false)+ "]|^)[" +
                                        utf8FromCodePoint(0xDC00, false) + "-" + utf8FromCodePoint(0xDFFF, false) + "]");
static const std::regex NON_ASCII_LINE_BREAKS("\x85\u2028\u2029");

class Loader {
public:
    explicit Loader(const string& input) : input_(input) {}
    explicit Loader(string&& input) : input_(std::move(input)) {}

    vector<yaml> load() {
        if (input_.size() >= 1) {
            // Add tailing '\n' if not exists
            if (input_.back() != LF && input_.back() != CR)
                input_ += '\n';

            // Strip optional BOM of UTF-8
            if (input_.size() >= 3 &&
                string(input_.cbegin(), input_.cbegin() + 3) == "\xef\xbb\xbf")
                input_ = string(input_.cbegin() + 3, input_.cend());
        }

        length_ = input_.size();

        // Use 0 as string terminator, that significantly simplifies bounds check.
        input_ += '\0';

        // Eat all leading spaces
        while (ch() == SPACE) {
            ++lineIndent_;
            ++pos_;
        }

        while (pos_ < length_ - 1)
            readDocument();

        return documents_;
    }

private:
    string input_;

    const vector<TypePtr>& implicitTypes_ = Type::implicitTypes;
    const unordered_map<string, TypePtr>& typeMap_ = Type::typeMap;

    size_t length_{};  // stream length
    size_t pos_{};  // position
    size_t line_{};  // line number
    size_t lineStart_{};  // line start position in stream
    size_t lineIndent_{};  // line indent amount

    string version_;
    bool checkLineBreaks_{};

    unordered_map<string, string> tagMap_;
    unordered_map<string, yaml> anchorMap_;

    string tag_;
    string anchor_;

    string kind_;
    yaml result_;
    vector<yaml> documents_;

private:
    void throwError(const string& message) {
        string msg = message;
        msg += ": position " + std::to_string(pos_) + ", line " + std::to_string(line_) + ", col " + std::to_string(pos_ - lineStart_);
        throw runtime_error(msg);
    }

    void throwWarning(const string& /*message*/) {
    }

    const char& ch() const {
        return input_[pos_];
    }

    const char& ch(int64_t i) const {
        return input_[pos_ + i];
    }

    bool reachEnd(const char& c) {
        return c == NUL;
    }

    bool reachEnd() {
        return reachEnd(ch());
    }

    bool notEnd(const char& c) {
        return c != NUL;
    }

    bool notEnd() {
        return notEnd(ch());
    }

    bool isEOL(const char& c) {
        return c == LF or c == CR;
    }

    bool isEOL() {
        return isEOL(ch());
    }

    bool isWhiteSpace() {
        auto& c = ch();
        return c == TAB or c == SPACE;
    }

    bool isWhiteSpaceOrEOL(const char& c) {
        return c == TAB or c == SPACE or c == LF or c == CR;
    }

    bool isWhiteSpaceOrEOL() {
        return isWhiteSpaceOrEOL(ch());
    }

    bool isFlowIndicator(const char& c) {
        return c == COMMA or c == LEFT_BRACKET or c == RIGHT_BRACKET or c == LEFT_BRACE or c == RIGHT_BRACE;
    }

    bool isFlowIndicator() {
        return isFlowIndicator(ch());
    }

    void captureSegment(size_t start, size_t end, bool checkJson) {
        string result;
        captureSegment(result, start, end, checkJson);
        if (not result.empty())
            result_ = (result_.is_string() ? result_.get<string>() : string()) + result;
    }

    void captureSegment(string& result, size_t start, size_t end, bool checkJson) {
        if (start >= end)
            return;

        string s = string(input_.cbegin() + start, input_.cbegin() + end);
        if (checkJson) {
            for (auto &c: s)
                if (not (c == 0x09 or (0x20 <= c /*and c <= 0x10FFFF*/)))  // TODO: 0x10FFFF
                    throwError("expected valid JSON character");
        }
        /*else if (std::regex(s, notPrintableRe))
            throwError("the stream contains non-printable characters");*/  // TODO:

        result += s;
    }

    void mergeMappings(yaml& destination, const yaml& source, unordered_set<string>& overridableKeys) {
        if (not source.is_object())
            throwError("cannot merge mappings; the provided source object is unacceptable");

        for (auto it = source.cbegin(); it != source.cend(); ++it) {
            string key = it.key();
            if (not destination.count(key)) {
                destination[key] = it.value();
                overridableKeys.insert(key);
            }
        }
    }

    void storeMappingPair(yaml& result,
                          unordered_set<string>& overridableKeys,
                          const string& keyTag,
                          const yaml& keyNode,
                          const yaml& valueNode) {
        if (keyTag == "tag:yaml.org,2002:merge") {
            if (valueNode.is_array()) {
                for (auto &item: valueNode)
                    mergeMappings(result, item, overridableKeys);
            }
            else
                mergeMappings(result, valueNode, overridableKeys);
        }
        else {
            // TODO: duplicated mapping key
            string key;
            if (keyNode.is_string())
                key = keyNode.get<string>();
            else if (keyNode.is_null())
                key = "null";
            else if (keyNode.is_boolean())
                key = keyNode ? "true" : "false";
            else if (keyNode.is_number_integer())
                key = std::to_string(keyNode.get<int64_t>());
            else if (keyNode.is_number_unsigned())
                key = std::to_string(keyNode.get<uint64_t>());
            else if (keyNode.is_number_float())
                key = std::to_string(keyNode.get<double>());
            else if (keyNode.is_array())
                key = "array";  // TODO
            else if (keyNode.is_object())
                key = "object";  // TODO
            result[key] = valueNode;
            overridableKeys.erase(key);
        }
    }

    void readLineBreak() {
        if (ch() == LF)
            ++pos_;
        else if (ch() == CR) {
            ++pos_;
            if (ch() == LF)
                ++pos_;
        }
        else
            throwError("a line break is expected");

        ++line_;
        lineStart_ = pos_;
    }

    size_t skipSeparationSpace(bool allowComments = true, int64_t checkIndent = -1) {
        size_t lineBreaks = 0;

        while (notEnd()) {
            while (isWhiteSpace())
                ++pos_;

            if (allowComments and ch() == POUND) {
                do { ++pos_; }
                while (not isEOL() and notEnd());
            }

            if (isEOL()) {
                readLineBreak();

                ++lineBreaks;
                lineIndent_ = 0;

                while (ch() == SPACE) {
                    ++lineIndent_;
                    ++pos_;
                }
            }
            else
                break;
        }

        if (checkIndent != -1 and lineBreaks != 0 and static_cast<int64_t>(lineIndent_) < checkIndent)
            throwWarning("deficient indentation");

        return lineBreaks;
    }

    void handleDirective(const string& name, const vector<string>& args) {
        if (name == "YAML") {
            if (not version_.empty())
                throwError("duplication of %YAML directive");

            if (args.size() != 1)
                throwError("YAML directive accepts exactly one argument");

            std::smatch m;
            auto match = std::regex_match(args[0], m, std::regex(R"re(^([0-9]+)\.([0-9]+)$)re"));
            if (not match)
                throwError("ill-formed argument of the YAML directive");

            auto major = std::stoi(m[1]);
            auto minor = std::stoi(m[2]);

            if (major != 1)
                throwError("unacceptable YAML version of the document");
            if (minor != 1 and minor != 2)
                throwWarning("unsupported YAML version of the document");

            version_ = args[0];
            checkLineBreaks_ = minor < 2;
        }

        else if (name == "TAG") {
            if (args.size() != 2)
                throwError("TAG directive accepts exactly two arguments");

            auto& handle = args[0];
            auto& prefix = args[1];

            if (not std::regex_match(handle, TAG_HANDLE_PATTERN))
                throwError("ill-formed tag handle (first argument) of the TAG directive");

            if (tagMap_.count(handle))
                throwError("there is a previously declared suffix for '" + handle + "' tag handle");

            if (not std::regex_match(prefix, TAG_URI_PATTERN))
                throwError("ill-formed tag prefix (second argument) of the TAG directive");

            tagMap_[handle] = prefix;
        }

        else
            throwWarning("unknown document directive '" + name + "'");
    }

    bool reachDocumentSeparator() {
        if ((ch() == HYPHEN or ch() == PERIOD)
            and ch() == ch(1) and ch() == ch(2)) {
            auto& c = ch(3);
            if (reachEnd(c) or isWhiteSpaceOrEOL(c))
                return true;
        }

        return false;
    }

    void writeFoldedLines(size_t count) {
        string result;
        writeFoldedLines(result, count);
        if (not result.empty())
            result_ = (result_.is_string() ? result_.get<string>() : string()) + result;
    }

    void writeFoldedLines(string &result, size_t count) {
        if (count == 1)
            result += ' ';
        else if (count > 1)
            result += repeatString("\n", count -1);
    }

    bool readPlainScala(size_t nodeIndent, bool withinFlowCollection) {
        if (isWhiteSpaceOrEOL() or isFlowIndicator())
            return false;

        if (ch() == POUND and ch() == AMPERSAND and
            ch() ==  ASTERISK and ch() == EXCLAMATION and
            ch() == PIPE and ch() == GREATER_THAN and
            ch() == SINGLE_QUOTE and ch() == DOUBLE_QUOTE and
            ch() == PERCENT and ch() == AT and
            ch() == BACK_QUOTE)
            return false;

        if (ch() == QUESTION_MARK or ch() == HYPHEN) {
            auto& following = ch(1);

            if (isWhiteSpaceOrEOL(following) or (withinFlowCollection and isFlowIndicator(following)))
                return false;
        }

        auto kind = kind_;
        auto result = result_;
        kind_ = "scalar";
        result_ = nullptr;

        auto captureStart = pos_;
        auto captureEnd = pos_;
        bool hasPendingContent = false;

        size_t line = line_;
        while (notEnd()) {
            if (ch() == COLON) {
                auto& following = ch(1);

                if (isWhiteSpaceOrEOL(following) or (withinFlowCollection and isFlowIndicator(following)))
                    break;
            }
            else if (ch() == POUND) {
                auto& preceding = ch(-1);

                if (isWhiteSpaceOrEOL(preceding))
                    break;
            }
            else if ((pos_ == lineStart_ and reachDocumentSeparator()) or (withinFlowCollection and isFlowIndicator()))
                break;
            else if (isEOL()) {
                line = line_;
                size_t lineStart = lineStart_;
                size_t lineIndent = lineIndent_;

                skipSeparationSpace(false);

                if (lineIndent_ >= nodeIndent) {
                    hasPendingContent = true;
                    continue;
                } else {
                    pos_ = captureEnd;
                    line_ = line;
                    lineStart_ = lineStart;
                    lineIndent_ = lineIndent;
                    break;
                }
            }

            if (hasPendingContent) {
                captureSegment(captureStart, captureEnd, false);
                writeFoldedLines(line_ - line);
                captureStart = captureEnd = pos_;
                hasPendingContent = false;
            }

            if (not isWhiteSpace())
                captureEnd = pos_ + 1;

            ++pos_;
        }

        captureSegment(captureStart, captureEnd, false);

        if (not result_.empty())
            return true;

        kind_ = kind;
        result_ = result;
        return false;
    }

    bool readSingleQuotedScalar(size_t nodeIndent) {
        if (ch() != SINGLE_QUOTE)
            return false;

        kind_ = "scalar";
        result_ = nullptr;
        ++pos_;
        auto captureStart = pos_;
        auto captureEnd = pos_;

        while (notEnd()) {
            if (ch() == SINGLE_QUOTE) {
                captureSegment(captureStart, pos_, true);
                ++pos_;

                if (ch() == SINGLE_QUOTE) {
                    captureStart = captureEnd = pos_;
                    ++pos_;
                }
                else
                    return true;
            }
            else if (isEOL()) {
                captureSegment(captureStart, captureEnd, true);
                writeFoldedLines(skipSeparationSpace(false, static_cast<int64_t>(nodeIndent)));
                captureStart = captureEnd = pos_;
            }
            else if (pos_ == lineStart_ and reachDocumentSeparator())
                throwError("unexpected end of the document within a single quoted scalar");
            else {
                ++pos_;
                captureEnd = pos_;
            }
        }

        throwError("unexpected end of the stream within a single quoted scalar");
        return false;  // never go here
    }

    bool readDoubleQuotedScalar(size_t nodeIndent) {
        if (ch() != DOUBLE_QUOTE)
            return false;

        kind_ = "scalar";
        result_ = nullptr;
        ++pos_;
        auto captureStart = pos_;
        auto captureEnd = pos_;

        string result;

        while (notEnd()) {
            if (ch() == DOUBLE_QUOTE) {
                captureSegment(result, captureStart, pos_, true);
                ++pos_;
                if (not result.empty())
                    result_ = result;
                return true;
            }
            else if (ch() == BACKSLASH) {
                captureSegment(result, captureStart, pos_, true);
                ++pos_;

                if (isEOL())
                    skipSeparationSpace(false, static_cast<int64_t>(nodeIndent));
                else if (/* ch() < 256 and*/ simpleEscapeCheck[static_cast<int>(ch())]) {  // TODO: 256
                    result += simpleEscapeMap[static_cast<int>(ch())];
                    ++pos_;
                }
                else if (auto tmp = escapedHexLen(ch())) {
                    auto hexLenght = tmp;
                    uint32_t hexResult = 0;

                    for (; hexLenght > 0; --hexLenght) {
                        ++pos_;

                        if ((tmp = fromHexCode(ch())) >= 0)
                            hexResult = (hexResult << 4) + tmp;
                        else
                            throwError("expected hexadecimal character");
                    }

                    result += utf8FromCodePoint(hexResult);
                    ++pos_;
                }
                else
                    throwError("unknown escape sequence");

                captureStart = captureEnd = pos_;
            }
            else if (isEOL()) {
                captureSegment(result, captureStart, captureEnd, true);
                writeFoldedLines(result, skipSeparationSpace(false, nodeIndent));
                captureStart = captureEnd = pos_;
            }
            else if (pos_ == lineStart_ and reachDocumentSeparator())
                throwError("unexpected end of the document within a double quoted scalar");
            else {
                ++pos_;
                captureEnd = pos_;
            }
        }

        throwError("unexpected end of the stream within a double quoted scalar");
        return false;  // never go here
    }

    bool readFlowCollection(size_t nodeIndent) {
        char terminator;
        bool isMapping;

        if (ch() == LEFT_BRACKET) {
            terminator = RIGHT_BRACKET;
            isMapping = false;
        }
        else if (ch() == LEFT_BRACE) {
            terminator = RIGHT_BRACE;
            isMapping = true;
        }
        else
            return false;

        string tag = tag_;
        string anchor = anchor_;
        bool readNext = true;
        unordered_set<string> overridableKeys;
        yaml result;

        ++pos_;

        while (notEnd()) {
            skipSeparationSpace(true, nodeIndent);

            if (ch() == terminator) {
                ++pos_;
                tag_ = tag;
                anchor_ = anchor;
                kind_ = isMapping ? "mapping" : "sequence";
                result_ = result;

                if (not anchor_.empty())
                    anchorMap_[anchor_] = result;

                return true;
            }
            else if (not readNext)
                throwError("missed comma between flow collection entries");

            bool isPair{}, isExplicitPair{};

            if (ch() == QUESTION_MARK) {
                auto following = ch(1);
                if (isWhiteSpaceOrEOL(following)) {
                    isPair = isExplicitPair = true;
                    ++pos_;
                    skipSeparationSpace(true, nodeIndent);
                }
            }

            auto line = line_;
            composeNode(nodeIndent, Context::FLOW_IN, false, true);
            auto keyTag = tag_;
            yaml keyNode = result_;
            skipSeparationSpace(true, nodeIndent);

            yaml valueNode;
            if ((isExplicitPair or line_ == line) and ch() == COLON) {
                isPair = true;
                ++pos_;
                skipSeparationSpace(true, nodeIndent);
                composeNode(nodeIndent, Context::FLOW_IN, false, true);
                valueNode = result_;
            }

            if (isMapping)
                storeMappingPair(result, overridableKeys, keyTag, keyNode, valueNode);
            else if (isPair) {
                yaml r;
                storeMappingPair(r, overridableKeys, keyTag, keyNode, valueNode);
                result.push_back(r);
            }
            else
                result.push_back(keyNode);

            skipSeparationSpace(true, nodeIndent);

            if (ch() == COMMA) {
                readNext = true;
                ++pos_;
            }
            else
                readNext = false;
        }

        throwError("unexpected end of the stream within a flow collection");
        return false;  // never go here
    }

    bool readBlockScalar(size_t nodeIndent) {
        bool folding;
        if (ch() == PIPE)
            folding = false;
        else if (ch() == GREATER_THAN)
            folding = true;
        else
            return false;

        kind_ = "scalar";
        result_ = nullptr;

        auto chomping = Chomping::CLIP;
        auto detectedIndent = false;
        auto textIndent = nodeIndent;

        while (notEnd()) {
            ++pos_;

            if (ch() == PLUS or ch() == MINUS) {
                if (chomping == Chomping::CLIP)
                    chomping = (ch() == PLUS) ? Chomping::KEEP : Chomping::STRIP;
                else
                    throwError("repeat of a chomping mode identifier");
            }
            else {
                auto tmp = fromDecimalCode(ch());
                if (tmp < 0)
                    break;

                if (tmp == 0)
                    throwError("bad explicit indentation width of a block scalar; it cannot be less than one");
                else if (not detectedIndent) {
                    textIndent = nodeIndent + tmp - 1;
                    detectedIndent = true;
                }
                else
                    throwError("repeat of an indentation width identifier");
            }
        }

        if (isWhiteSpace()) {
            do { ++pos_; }
            while (isWhiteSpace());

            if (ch() == POUND) {
                do { ++pos_; }
                while (not isEOL() and notEnd());
            }
        }

        int emptyLines = 0;
        bool didReadContent = false;
        bool atMoreIndented = false;

        string result;

        while (notEnd()) {
            readLineBreak();
            lineIndent_ = 0;

            while ((not detectedIndent or lineIndent_ < textIndent) and (ch() == SPACE)) {
                ++lineIndent_;
                ++pos_;
            }

            if (not detectedIndent and lineIndent_ > textIndent)
                textIndent = lineIndent_;

            if (isEOL()) {
                ++emptyLines;
                continue;
            }

            // End of the scalar
            if (lineIndent_ < textIndent) {
                // Perform the chomping
                if (chomping == Chomping::KEEP)
                    result += string(didReadContent ? emptyLines + 1 : emptyLines, '\n');
                else if (chomping == Chomping::CLIP){
                    if (didReadContent)  // i.e. only if the scalar if not empty
                        result += '\n';
                }

                // Break this `while` cycle and go to the function's epilogue
                break;
            }

            // Folded style: use fancy rules to handle line breaks
            if (folding) {
                // Lines starting with white space characters (more-indented lines) are not folded
                if (isWhiteSpace()) {
                    atMoreIndented = true;
                    // except for the first content line
                    result += string(didReadContent ? emptyLines + 1 : emptyLines, '\n');
                } else if (atMoreIndented) {  // End of more-indented block
                    atMoreIndented = false;
                    result += string(emptyLines + 1, '\n');
                } else if (emptyLines == 0) {  // Just one line break - perceive as the same line
                    if (didReadContent)  // i.e. only if we have already read some scalar content
                        result += ' ';
                } else {  // Several line breaks - perceive as different lines
                    result += string(emptyLines, '\n');
                }
            }
            else {  // Literal style: just add exact number of line breaks between content lines
                // Keep all line breaks except the header line break
                result += string(didReadContent ? emptyLines + 1 : emptyLines, '\n');
            }

            didReadContent = true;
            detectedIndent = true;
            emptyLines = 0;
            auto captureStart = pos_;

            while (not isEOL() and notEnd())
                ++pos_;

            captureSegment(result, captureStart, pos_, false);
        }

        result_ = result;

        return true;
    }

    bool readBlockSequence(size_t nodeIndent) {
        bool detected = false;
        auto tag = tag_;
        auto anchor = anchor_;
        yaml result;

        while (notEnd()) {
            if (ch() != HYPHEN)
                break;

            auto following = input_[pos_ + 1];

            if (not isWhiteSpaceOrEOL(following))
                break;

            detected = true;
            ++pos_;

            if (skipSeparationSpace()) {
                if (lineIndent_ <= nodeIndent) {
                    result.push_back(nullptr);
                    continue;
                }
            }

            auto line = line_;
            composeNode(nodeIndent, Context::BLOCK_IN, false, true);
            result.push_back(result_);
            skipSeparationSpace();

            if ((line_ == line or lineIndent_ > nodeIndent) and notEnd())
                throwError("bad indentation of a sequence entry");
            else if (lineIndent_ < nodeIndent)
                break;
        }

        if (not anchor_.empty())
            anchorMap_[anchor] = result;

        if (detected) {
            tag_ = tag;
            anchor_ = anchor;
            kind_ = "sequence";
            result_ = result;
            return true;
        }
        return false;
    }

    bool readBlockMapping(size_t nodeIndent, size_t flowIndent) {
        auto tag = tag_;
        auto anchor = anchor_;
        auto detected = false;
        string keyTag;
        yaml keyNode;
        yaml valueNode;
        bool atExplicitKey = false;
        unordered_set<string> overridableKeys;
        bool allowCompact = false;
        yaml result;

        while (notEnd()) {
            auto following = ch(1);
            auto line = line_;  // Save the current line

            // Explicit notation case. There are two separate blocks:
            // first for the key (denoted by '?') and second for the value (denoted by ':')
            if ((ch() == QUESTION_MARK or ch() == COLON) and isWhiteSpaceOrEOL(following)) {
                if (ch() == QUESTION_MARK) {
                    if (atExplicitKey) {
                        storeMappingPair(result, overridableKeys, keyTag, keyNode, nullptr);
                        keyTag = "";
                        keyNode = "";
                        valueNode = nullptr;
                    }

                    detected = true;
                    atExplicitKey = true;
                    allowCompact = true;
                }
                else if (atExplicitKey) {
                    // i.e. ':' appears after the explicit key
                    atExplicitKey = false;
                    allowCompact = true;
                }
                else
                    throwError("incomplete explicit mapping pair; a key node is missed");

                ++pos_;
            }
                // Implicit notation case. Flow-style node as the key first, then ':', and the value
            else if (composeNode(flowIndent, Context::FLOW_OUT, false, true)) {
                if (line_ == line) {
                    while (isWhiteSpace())
                        ++pos_;

                    if (ch() == COLON) {
                        ++pos_;

                        if (not isWhiteSpaceOrEOL())
                            throwError("a whitespace character is expected after the "
                                           "key-value separator within a block mapping");

                        if (atExplicitKey) {
                            storeMappingPair(result, overridableKeys, keyTag, keyNode, nullptr);
                            keyTag = "";
                            keyNode = "";
                            valueNode = nullptr;
                        }

                        detected = true;
                        atExplicitKey = false;
                        allowCompact = false;
                        keyTag = tag_;
                        keyNode = result_;
                    }
                    else if (detected)
                        throwError("can not read an implicit mapping pair; a colon is missed");
                    else {
                        tag_ = tag;
                        anchor_ = anchor;
                        return true;  // Keep the result of `composeNode`
                    }
                }
                else if (detected)
                    throwError("can not read a block mapping entry; a multiline key may not be an implicit key");
                else {
                    tag_ = tag;
                    anchor_ = anchor;
                    return true;  // Keep the result of `composeNode`
                }
            }
            else
                break;  // Reading is done. Go to the epilogue

            // Common reading code for both explicit and implicit notations
            if (line_ == line or lineIndent_ > nodeIndent) {
                if (composeNode(nodeIndent, Context::BLOCK_OUT, true, allowCompact)) {
                    if (atExplicitKey)
                        keyNode = result_;
                    else
                        valueNode = result_;
                }

                if (not atExplicitKey) {
                    storeMappingPair(result, overridableKeys, keyTag, keyNode, valueNode);
                    keyTag = "";
                    keyNode = "";
                    valueNode = nullptr;
                }

                skipSeparationSpace();
            }

            if (lineIndent_ > nodeIndent and notEnd())
                throwError("bad indentation of a mapping entry");
            else if (lineIndent_ < nodeIndent)
                break;
        }

        // Epilogue

        // Special case: last mapping's node contains only the key in explicit notation
        if (atExplicitKey)
            storeMappingPair(result, overridableKeys, keyTag, keyNode, nullptr);

        // Expose the resulting mapping
        if (detected) {
            tag_ = tag;
            anchor_ = anchor;
            kind_ = "mapping";
            result_ = result;
        }

        return detected;
    }

    bool readTagProperty() {
        if (ch() != EXCLAMATION)
            return false;

        if (not tag_.empty())
            throwError("duplication of a tag property");

        ++pos_;

        bool isVerbatim = false, isNamed = false;
        string tagHandle;

        if (ch() == LESS_THAN) {
            isVerbatim = true;
            ++pos_;
        }
        else if (ch() == EXCLAMATION) {
            isNamed = true;
            tagHandle = "!!";
            ++pos_;
        }
        else
            tagHandle = "!";

        auto position = pos_;
        string tagName;

        if (isVerbatim) {
            do { ++pos_; }
            while (notEnd() and ch() != GREATER_THAN);

            if (pos_ < length_) {
                tagName = string(input_.cbegin() + position, input_.cbegin() + pos_);
                ++pos_;
            }
            else
                throwError("unexpected end of the stream within a verbatim tag");
        }
        else {
            while (notEnd() and not isWhiteSpaceOrEOL()) {
                if (ch() == EXCLAMATION) {
                    if (not isNamed) {
                        tagHandle = string(input_.cbegin() + position - 1, input_.cbegin() + pos_ + 1);

                        if (not std::regex_match(tagHandle, TAG_HANDLE_PATTERN))
                            throwError("named tag handle cannot contain such characters");

                        isNamed = true;
                        position = pos_ + 1;
                    }
                    else
                        throwError("tag suffix cannot contain exclamation marks");
                }

                ++pos_;
            }

            tagName = string(input_.cbegin() + position, input_.cbegin() + pos_);

            if (std::regex_match(tagName, FLOW_INDICATOR_PATTERN))
                throwError("tag suffix cannot contain flow indicator characters");
        }

        if (not tagName.empty() and not std::regex_match(tagName, TAG_URI_PATTERN))
            throwError("tag name cannot contain such characters: " + tagName);

        if (isVerbatim)
            tag_ = tagName;
        else if (tagMap_.count(tagHandle))
            tag_ = tagMap_[tagHandle] + tagName;
        else if (tagHandle == "!")
            tag_ = "!" + tagName;
        else if (tagHandle == "!!")
            tag_ = "tag:yaml.org,2002:" + tagName;
        else
            throwError("undeclared tag handle '" + tagHandle + "'");

        return true;
    }


    bool readAnchorProperty() {
        if (ch() != AMPERSAND)
            return false;

        if (not anchor_.empty())
            throwError("duplication of an anchor property");

        ++pos_;
        auto position = pos_;

        while (notEnd() and not isWhiteSpaceOrEOL() and not isFlowIndicator())
            ++pos_;

        if (pos_ == position)
            throwError("name of an anchor node must contain at least one character");

        anchor_ = string(input_.cbegin() + position, input_.cbegin() + pos_);
        return true;
    }

    bool readAlias() {
        if (ch() != ASTERISK)
            return false;

        ++pos_;
        auto position = pos_;

        while (notEnd() and not isWhiteSpaceOrEOL() and not isFlowIndicator())
            ++pos_;

        if (pos_ == position)
            throwError("name of an alias node must contain at least one character");

        string alias(input_.cbegin() + position, input_.cbegin() + pos_);

        if (not anchorMap_.count(alias))
            throwError("unidentified alias '" + alias + "'");

        result_ = anchorMap_[alias];
        skipSeparationSpace();
        return true;
    }



    bool composeNode(size_t parentIndent, Context nodeContext, bool allowToSeek, bool allowCompact) {
        static const int8_t INDENT_MORE = 1, INDENT_EQUAL = 0, INDENT_LESS = -1;

        int8_t indentStatus = INDENT_MORE;
        bool atNewLine = false;
        bool hasContent = false;

        // TODO: listener

        tag_.clear();
        anchor_.clear();
        kind_.clear();
        result_ = nullptr;

        bool allowBlockStyles = (nodeContext == Context::BLOCK_OUT or nodeContext == Context::BLOCK_IN);
        bool allowBlockScalars = allowBlockStyles;
        bool allowBlockCollections = allowBlockStyles;

        if (allowToSeek) {
            if (skipSeparationSpace()) {
                atNewLine = true;

                if (lineIndent_ > parentIndent)
                    indentStatus = INDENT_MORE;
                else if (lineIndent_ == parentIndent)
                    indentStatus = INDENT_EQUAL;
                else if (lineIndent_ < parentIndent)
                    indentStatus = INDENT_LESS;
            }
        }

        if (indentStatus == INDENT_MORE) {
            while (readTagProperty() || readAnchorProperty()) {
                if (skipSeparationSpace()) {
                    atNewLine = true;
                    allowBlockCollections = allowBlockStyles;

                    if (lineIndent_ > parentIndent)
                        indentStatus = INDENT_MORE;
                    else if (lineIndent_ == parentIndent)
                        indentStatus = INDENT_EQUAL;
                    else if (lineIndent_ < parentIndent)
                        indentStatus = INDENT_LESS;
                } else
                    allowBlockCollections = false;
            }
        }

        if (allowBlockCollections)
            allowBlockCollections = atNewLine || allowCompact;

        if (indentStatus == INDENT_MORE || nodeContext == Context::BLOCK_OUT) {
            size_t flowIndent;
            if (nodeContext == Context::FLOW_IN || nodeContext == Context::FLOW_OUT)
                flowIndent = parentIndent;
            else
                flowIndent = parentIndent + 1;

            size_t blockIndent = pos_ - lineStart_;

            if (indentStatus == INDENT_MORE) {
                if ((allowBlockCollections and
                     (readBlockSequence(blockIndent) or readBlockMapping(blockIndent, flowIndent))) or
                    readFlowCollection(flowIndent)) {
                    hasContent = true;
                }
                else {
                    if ((allowBlockScalars and readBlockScalar(flowIndent)) or
                        readSingleQuotedScalar(flowIndent) or
                        readDoubleQuotedScalar(flowIndent)) {
                        hasContent = true;
                    }
                    else if (readAlias()) {
                        hasContent = true;

                        if (not tag_.empty() or not anchor_.empty())
                            throwError("alias node should not have any properties");
                    }
                    else if (readPlainScala(flowIndent, nodeContext == Context::FLOW_IN)) {
                        hasContent = true;

                        if (tag_.empty())
                            tag_ = "?";
                    }

                    if (not anchor_.empty())
                        anchorMap_[anchor_] = result_;
                }
            }
            else if (indentStatus == INDENT_EQUAL) {
                // Special case: block sequences are allowed to have same indentation level as the parent
                hasContent = allowBlockCollections and readBlockSequence(blockIndent);
            }
        }

        if (not tag_.empty() and tag_ != "!") {
            if (tag_ == "?") {
                for (auto type: implicitTypes_) {

                    /* Implicit resolving is not allowed for non-scalar types, and '?'
                     * not-specific tag is only assigned to plain scalars. So, it isn't
                     * needed to check for 'kind' conformity.
                     */
                    if (type->resolve(result_)) {  // `result_` updated in resolver if matched
                        result_ = type->construct(result_);
                        tag_ = type->tag;
                        if (not anchor_.empty())
                            anchorMap_[anchor_] = result_;
                        break;
                    }
                }
            }
            else if (typeMap_.count(tag_)) {
                auto type = typeMap_.at(tag_);

                if (not result_.empty() and type->kind != kind_)
                    throwError("unacceptable node kind for !<" + tag_ + "> tag; it should be \""
                               + type->kind + "\", not \"" + kind_ + "\"");

                if (not type->resolve(result_))  // `result_` updated in resolver if matched
                    throwError("cannot resolve a node with !<" + tag_ + "> explicit tag");
                else {
                    result_ = type->construct(result_);
                    if (not anchor_.empty())
                        anchorMap_[anchor_] = result_;
                }
            }
            else
                throwError("unknown tag !<" + tag_ + ">");
        }

        return not tag_.empty() or not anchor_.empty() or hasContent;
    }

    void readDocument() {
        auto documentStart = pos_;
        bool hasDirectives = false;

        // Handle directives
        while (notEnd()) {
            skipSeparationSpace();

            if (lineIndent_ > 0 or ch() != PERCENT)
                break;

            hasDirectives = true;
            auto position = ++pos_;

            while (notEnd() and not isWhiteSpaceOrEOL())
                ++pos_;

            string directiveName(input_.cbegin() + position, input_.cbegin() + pos_);
            if (directiveName.empty())
                throwError("directive name must not be empty");

            vector<string> directiveArgs;

            while (notEnd()) {
                while (isWhiteSpace())
                    ++pos_;

                if (ch() == POUND) {
                    do { ++pos_; }
                    while (not isEOL() and notEnd());

                    break;
                }

                if (isEOL())
                    break;

                position = pos_;

                while (notEnd() and not isWhiteSpaceOrEOL())
                    ++pos_;

                directiveArgs.emplace_back(input_.cbegin() + position, input_.cbegin() + pos_);
            }

            if (notEnd())
                readLineBreak();

            handleDirective(directiveName, directiveArgs);
        }

        skipSeparationSpace();

        if (lineIndent_ == 0
            and ch() == HYPHEN and ch(1) == HYPHEN and ch(2) == HYPHEN) {
            pos_ += 3;
            skipSeparationSpace();
        }
        else if (hasDirectives)
            throwError("directives end mark is expected");

        // Compose nodes recursively
        composeNode(lineIndent_ - 1, Context::BLOCK_OUT, false, true);
        skipSeparationSpace();

        if (checkLineBreaks_ and std::regex_match(input_.cbegin() + documentStart, input_.cbegin() + pos_, NON_ASCII_LINE_BREAKS))
            throwWarning("non-ASCII line breaks are interpreted as content");

        documents_.push_back(result_);

        if (pos_ == lineStart_ and reachDocumentSeparator()) {
            if (ch() == PERIOD) {
                pos_ += 3;
                skipSeparationSpace();
            }

            return;
        }

        if (pos_ < length_ - 1)
            throwError("end of the stream or a document separator is expected");
        else
            return;
    }
};

vector<yaml> loadAll(string&& input) {
    Loader loader(std::move(input));
    return loader.load();
}

yaml load(string&& input) {
    auto docs = loadAll(std::move(input));

    if (docs.size() == 1)
        return std::move(docs[0]);
    else if (docs.empty())
        throw invalid_argument("expected a single document in the stream, but found none");
    else
        throw invalid_argument("expected a single document in the stream, but found more");
}

}
